rm(list = ls())
library(ROCR); library(tidyverse)
setwd(dirname(rstudioapi::getActiveDocumentContext()$path)) 

df <- readRDS('Data/Prediction/img5_weighted_opencv_cov2f_nofw_level.rds') %>%
  rename_at(vars(matches('ff')), ~ gsub('ff', 'im', .x)) %>%
  drop_na(race.mt, race.s, race.gs, race.lstm, race.fbisg, race.lstm_nc, race.lstm_wk, race.im, race.hybrid) %>%
  select(matches('lstm_wk|gs|fbisg|hybrid|ff|^s\\.|^im\\.|race|full_name')) %>%
  distinct()

SR  <- c("SR.WHI", "SR.BLA", "SR.HIS", "SR.ASI")
eth <- c("whi", "bla", "his", "asi")
sepdf <- function(predtype){
  temp <- df[,c('race.mt', paste0(gsub('\\.','',predtype), '.', eth))]
  for (i in eth){
    temp[[paste0('pred.', i)]]       <- temp[[paste0(gsub('\\.','',predtype), '.', i)]]
    temp[[paste0('SR.',toupper(i))]] <- ifelse(temp$race.mt == i, 1, 0)
  }
  temp <- select(temp, matches('^(SR\\.|pred\\.)'))
  return(temp)
} 

pred  <- list()
pred$df.s  <- sepdf('.s')
pred$df.gs <- sepdf('.gs')
pred$df.fbisg <- sepdf('.fbisg')
pred$df.lstm <- sepdf('.lstm_wk')
pred$df.im <- sepdf('.im')
pred$df.hybrid <- sepdf('.hybrid')

AUC <- as.data.frame(matrix(NA, nrow = 0, ncol = 5))
names(AUC) <- c(eth[1:4], 'model')
cut <- seq(0, 1, 0.001)
ROC <- NULL
for (j in 1:length(pred)){
  for (k in 1:4) {
    print(paste(names(pred)[j], eth[k]))
    df.temp <- pred[[j]]
    preds   <- df.temp[[paste("pred", eth[k], sep = ".")]] # choose pred.whi column only
    truth   <- df.temp[[SR[k]]] # get truth value: 'SR.WHI'
    
    roc.temp <- as.data.frame(matrix(NA, nrow = length(cut), ncol = 2))
    names(roc.temp) <- c("fpr", "tpr")
    
    ## Calculate False Positive Rate (fpr) and True Positive Rate (tpr) for ROC curve
    for (l in 1:length(cut)) {
      table.temp <- table(truth, preds >= cut[l]) 
      roc.temp$fpr[l] <- ifelse("FALSE" %in% colnames(table.temp) & length(colnames(table.temp))==1, 0, 
                                table.temp[rownames(table.temp) == "0", colnames(table.temp) == "TRUE"] / table(truth)["0"]) # false positive rate
      roc.temp$tpr[l] <- ifelse("FALSE" %in% colnames(table.temp) & length(colnames(table.temp))==1, NA,
                                table.temp[rownames(table.temp) == "1", colnames(table.temp) == "TRUE"] / table(truth)["1"]) # true positive rate
      roc.temp$tpr <- ifelse(is.na(roc.temp$tpr), 1, roc.temp$tpr)
    }
    
    # last row
    table.temp <- table(truth, preds >= 1)
    roc.temp$fpr[length(cut)] <- 1 - table.temp[rownames(table.temp) == "0", colnames(table.temp) == "FALSE"] / table(truth)["0"]
    roc.temp$tpr[length(cut)] <- 1 - table.temp[rownames(table.temp) == "1", colnames(table.temp) == "FALSE"] / table(truth)["1"]
    
    ROC <- bind_rows(ROC, roc.temp %>% mutate(model = names(pred)[j], race = eth[k]))
    
    ## Calculate Area under Curve using ROCR package functions
    AUC[j, k] <- attributes(performance(prediction(preds, truth), "auc"))$y.values[[1]]
    AUC$model[j] <- names(pred)[j]
  }
}

## Figure 2

## Format AUC for Plot Area
AUC.form <- AUC
for (i in 1:6) {
  for (j in 1:4){
    AUC.form[i, j] <- gsub("0.", ".", sprintf("%.2f", AUC[i, j]))
  }
}

ROC <- ROC %>% 
  mutate(m=ifelse(model == 'df.s', 'BSO', 
                  ifelse(model == 'df.gs', 'BISG', 
                         ifelse(model == 'df.im', 'Image',
                                ifelse(model == 'df.lstm', 'LSTM', 
                                       ifelse(model == 'df.fbisg', 'fBISG', 'Hybrid'))))),
         tpr = ifelse(fpr==0 & tpr ==1, 0, tpr),
         race = ifelse(race == 'asi', 'Asian', 
                       ifelse(race == 'bla', 'Black', 
                              ifelse(race == 'whi', 'White', 'Latino/Hispanic'))))
ROC$m <- factor(ROC$m, levels = c('BSO', 'BISG', 'fBISG', 'LSTM', 'Image', 'Hybrid'))
unique(ROC$m)

AUC.form$m <- unique(ROC$m)
text <- data.frame(race = c('White', 'Black', 'Latino/Hispanic', 'Asian'),
                   label = c(paste(paste0(AUC.form$m, ': ', AUC.form$whi), collapse = '\n'),
                             paste(paste0(AUC.form$m, ': ', AUC.form$bla), collapse = '\n'),
                             paste(paste0(AUC.form$m, ': ', AUC.form$his), collapse = '\n'),
                             paste(paste0(AUC.form$m, ': ', AUC.form$asi), collapse = '\n')),
                   m = unique(ROC$m)[1:4])

theme_roc <- theme_bw() + 
  theme(legend.position = "bottom",
        legend.title = element_blank(),
        legend.text = element_text(size = 8),
        legend.key=element_blank(),
        legend.background = element_rect(fill = "white", colour = "black", size = 0.2),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        axis.title.x = element_text(size = 8),
        axis.title.y = element_text(size = 8),
        axis.text.x = element_text(angle=45, hjust=1),
        plot.title = element_text(size=8),
        strip.text.x = element_text(size = 8))

pc <- ggplot(data = ROC, aes(x = fpr, y = tpr, group = m)) +
  geom_line(aes(linetype = m, color = m)) + 
  facet_wrap(. ~ race, nrow = 2) + 
  geom_text(data = text, 
            mapping = aes(x = rep(0.8,4), y = rep(0.25, 4), label = label, group = m), 
            size = 2, hjust = 0) +
  labs(x = 'False Positive Rate', y = 'True Positive Rate') +
  scale_color_manual(name = "", values = c('#38761d', '#55b32c', '#234912', 'gray45', 'gray70', 'red')) +
  scale_linetype_manual(name = "", values = c('longdash', 'dashed', 'dotdash', 'twodash', 'dotted', 'solid')) +
  guides(color=guide_legend(nrow = 1, byrow = T)) +
  theme_roc 

ggsave('Figures/Figure 2.png', pc, width = 5, height = 5, dpi = 600)



